####################################################
#Author: Kelli Marquardt
#Purpose: Produce tables based on physician type dataset (A3, B1) 

# Inputs:
#- data/doc_dat_fake.csv 
#- data/intermediate/pcp_fe_dat.csv (produced in Code Step3_EstimateModel.R )

# Outputs:
#-  output/tables/tab_a3.txt, tab_b1.txt

####################################################


############################
#0 load required packages
############################
rm(list = ls(all.names = TRUE))

#load packages
library(dplyr) 




#################################################
#Step 1: read in both datasets 
#################################################

doc_dat = read.csv(file.path("..", "data", "doc_dat_fake.csv"), stringsAsFactors = FALSE)
pcp_fe_dat = read.csv(file.path("..", "data", "intermediate", "pcp_fe_dat.csv"), stringsAsFactors = FALSE)

#################################################
#Step 2: Start with doc characteristic table (B1)
#################################################
###
# define the indicators with means for table 
doc_dat_full=doc_dat%>%
  mutate( 
    ## credentials
    cred_md = if_else(credentials == "md_do", 1, 0),
    cred_ot = if_else(credentials == "other", 1, 0),
    cred_un = if_else(credentials == "unknown", 1, 0),
    
    ## specialty
    spec_ps = if_else(specialty == "psych", 1, 0),
    spec_gm = if_else(specialty == "gen_med", 1, 0),
    spec_ou = if_else(specialty == "other_unknown", 1, 0),
    
    ## experience (omit 5to15)
    exp_le5  = if_else(experience == "le5", 1, 0),
    exp_ge15 = if_else(experience == "ge15", 1, 0),
    exp_un   = if_else(experience == "unknown", 1, 0),
    
    ## education (omit int_medschool)
    edu_us = if_else(med_school == "us_medschool", 1, 0),
    edu_un = if_else(med_school == "unknown", 1, 0)
    )


###
# build the 3 samples
d_sip  = doc_dat_full %>% filter(SIP  == 1)
d_ipcp = doc_dat_full %>% filter(IPCP == 1)
d_dp   = doc_dat_full %>% filter(DP   == 1)

###
#get the sample means 
d_sip_means=d_sip%>%select(SIP, IPCP, DP, ever_pcp, 
                           cred_md, cred_ot, cred_un, 
                           spec_ps, spec_gm, spec_ou, 
                           exp_le5, exp_ge15, exp_un, 
                           edu_us, edu_un)%>%
  summarise_all(mean)

d_ipcp_means=d_ipcp%>%select(SIP, IPCP, DP, ever_pcp, 
                           cred_md, cred_ot, cred_un, 
                           spec_ps, spec_gm, spec_ou, 
                           exp_le5, exp_ge15, exp_un, 
                           edu_us, edu_un)%>%
  summarise_all(mean)


d_dp_means=d_dp%>%select(SIP, IPCP, DP, ever_pcp, 
                             cred_md, cred_ot, cred_un, 
                             spec_ps, spec_gm, spec_ou, 
                             exp_le5, exp_ge15, exp_un, 
                             edu_us, edu_un)%>%
  summarise_all(mean)

###
#build the table 

#for simplicity, write a function that returns the string given a estimate row 
get_mean_string=function(header, tab_var_num){
  
  str_return=sprintf(paste(header, " &  %.3f &  %.3f & %.3f \\\\\n"),
                     d_sip_means[tab_var_num], d_ipcp_means[tab_var_num], d_dp_means[tab_var_num])
  
  return(str_return)
}

#ensure order of names 
names(d_sip_means)

doc_table_print =paste(
  "\\begin{tabular}{lccc} \n ",
  "\\toprule \n ",
  "& Sample Inclusion  & Initial Primary  & Diagnosing \\\\ \n",
  "& Physician (SIP) & Care Provider (IPCP) & Physician (DP)\\\\ \n",
  "\\midrule \n", 
  
  sprintf("SIP & \\textbf{%.2f}& %.2f & %.2f \\\\\n",
          d_sip_means[1], d_ipcp_means[1], d_dp_means[1]), 
  sprintf("IPCP & %.2f & \\textbf{%.2f} & %.2f \\\\\n",
          d_sip_means[2], d_ipcp_means[2], d_dp_means[2]),
  sprintf("DP & %.2f & %.2f & \\textbf{%.2f} \\\\\n",
          d_sip_means[3], d_ipcp_means[3], d_dp_means[3]),
  
  "\\midrule \n", 
  "\\addlinespace \n",
  sprintf("Any PCP & %.2f &  %.2f & %.2f \\\\\n",
          d_sip_means[4], d_ipcp_means[4], d_dp_means[4]),
  "\\midrule \n", 
  
  "\\textit{Credentials} & & & \\\\ \n",
  get_mean_string("\\hspace{3mm} MD/DO", 5 ),
  get_mean_string("\\hspace{3mm} Other", 6 ),
  get_mean_string("\\hspace{3mm} Unknown", 7 ),
  
  "\\textit{Specialty} & & & \\\\ \n",
  get_mean_string("\\hspace{3mm} Psych", 8), 
  get_mean_string("\\hspace{3mm} General Medicine", 9),
  get_mean_string("\\hspace{3mm} Other/Unknown", 10), 
  
  "\\textit{Experience} & & & \\\\ \n",
  get_mean_string("\\hspace{3mm} $\\leq$ 5 Years", 11), 
  get_mean_string("\\hspace{3mm} $>$ 15 Years", 12), 
  get_mean_string("\\hspace{3mm} Unknown", 13), 
  
  "\\textit{Education} & & & \\\\ \n",
  get_mean_string("\\hspace{3mm} US Medical School", 14), 
  get_mean_string("\\hspace{3mm} Unknown", 15),
  
  "\\midrule \n",
  sprintf("N & %d &  %d & %d \\\\\n",
          nrow(d_sip), nrow(d_ipcp), nrow(d_dp)),
  "\\bottomrule \n",
  "\\end{tabular} \n")

# save 
write(doc_table_print, file.path("..", "output", "tables", "tab_b1.txt"))

#clean up
rm(d_dp, d_dp_means, d_ipcp, d_ipcp_means, d_sip, d_sip_means)
rm(doc_table_print)
  


#################################################
#Step 3: Get the IPCP referral rate tercile comparison table (A3)
#################################################

#take pcp_fe_dat and merge with doc_dat_full
pcp_fe_dat=pcp_fe_dat%>%
  left_join(doc_dat_full, by=c("first_pcp_id"="doc_id"))

#determine the tercile value cut off and build the 6 groups accordingly 
male_tercile=quantile(pcp_fe_dat$fe_est[which(pcp_fe_dat$male==1)], c(.33, .66))
female_tercile=quantile(pcp_fe_dat$fe_est[which(pcp_fe_dat$male==0)], c(.33, .66))

###
# build the 6 samples
male_low=pcp_fe_dat%>%
  filter(male==1 & fe_est<=male_tercile[1])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)

male_mid=pcp_fe_dat%>%
  filter(male==1 & fe_est>male_tercile[1] & fe_est<male_tercile[2])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)

male_high=pcp_fe_dat%>%
  filter(male==1 & fe_est>=male_tercile[2])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)

female_low=pcp_fe_dat%>%
  filter(male==0 & fe_est<=female_tercile[1])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)

female_mid=pcp_fe_dat%>%
  filter(male==0 & fe_est>female_tercile[1] & fe_est<female_tercile[2])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)

female_high=pcp_fe_dat%>%
  filter(male==0 & fe_est>=female_tercile[2])%>%
  mutate(n_sample=max(row_number()))%>%
  select(fe_est, xi_est, xi_se, 
         npat, npatQ, 
         cred_md, cred_ot, spec_ps, spec_gm, exp_le5, exp_ge15, edu_us,n_sample)%>%
  summarise_all(mean)




###
#build the table 

#for simplicity, write a function that returns the string given a estimate row 
get_mean_string=function(header, tab_var_num){
  
  str_return=sprintf(paste(header, " & %.2f & %.2f & %.2f & %.2f & %.2f & %.2f \\\\\n"),
          male_high[tab_var_num], male_mid[tab_var_num], male_low[tab_var_num], 
          female_high[tab_var_num], female_mid[tab_var_num], female_low[tab_var_num])

  return(str_return)
}

#ensure order of names 
names(male_high)

pcp_table_print =paste(
    "\\begin{tabular}{lccc@{\\hskip 20pt}ccc} \n",
    "  \\toprule \n",
    "  & \\multicolumn{3}{c}{\\textbf{For Male Patients}} & \\multicolumn{3}{c}{\\textbf{For Female Patients}} \\\\ \n",
    "  \\cmidrule(lr){2-4} \\cmidrule(lr){5-7} \n",
    "  & Top & Middle & Bottom & Top & Middle & Bottom \\\\ \n",
    "  & Tercile & Tercile & Tercile & Tercile & Tercile & Tercile \\\\ \n",
    "  \\midrule \n",
    
    get_mean_string(header="Referral Rate: $\\widehat{\\gamma}^\\theta_j$",1 ),
    get_mean_string(header="ADHD Match Average:  $\\widehat{\\delta}^\\theta_j$",2 ),
    get_mean_string(header="ADHD Match Precision: se($\\widehat{\\delta}^\\theta_j$) ",3 ),
    
    "\\addlinespace \n",
    get_mean_string(header="\\# of Patients",4 ),
    get_mean_string(header="\\# of Patients ($Q_i=1$)", 5),
    "\\addlinespace \n",
    
    "\\textit{Credentials}  & & & & & & \\\\ \n",
    get_mean_string("\\hspace{3mm} MD/DO", 6 ),
    get_mean_string("\\hspace{3mm} Other", 7 ),

    "\\textit{Specialty}  & & & & & & \\\\ \n",
    get_mean_string("\\hspace{3mm} Psych", 8), 
    get_mean_string("\\hspace{3mm} General Medicine", 9),

    "\\textit{Experience} & & & & & &  \\\\ \n",
    get_mean_string("\\hspace{3mm} $\\leq$ 5 Years", 10), 
    get_mean_string("\\hspace{3mm} $>$ 15 Years", 11), 

    "\\textit{Education} & & & \\\\ \n",
    get_mean_string("\\hspace{3mm} US Medical School", 12), 
    
    
  "\\midrule \n",
  sprintf("N & %d & %d & %d & %d & %d & %d \\\\\n",
          male_high[[13]], male_mid[[13]], male_low[[13]],
          female_high[[13]], female_mid[[13]], female_low[[13]]),
  
    "\\bottomrule \n",
    "\\end{tabular} \n")


# save 
write(pcp_table_print, file.path("..", "output", "tables", "tab_a3.txt"))

#clean up
rm(doc_dat_full)
rm(female_high, female_mid, female_low, male_high, male_mid, male_low)
rm(female_tercile, male_tercile, pcp_table_print, get_mean_string) 

#END OF SCRIPT









